细胞器组装 -- 三代数据 -- Canu v2.3

安装(二选一)

mamba安装

mamba create -n pb-assembly
mamba activate pb-assembly
mamba search canu
mamba install -y canu=2.3

下载解压

curl -LRO https://github.com/marbl/canu/releases/download/v2.3/canu-2.3.Linux-amd64.tar.xz
tar -xJf canu-2.3.*.tar.xz # 会报错,缺少依赖

在windows下先解压,传上去
tar -czvf  canu-2.3.Linux-amd64.tar.gz  canu-2.3.Linux-amd64 # 压缩
tar -zxvf canu-2.3.Linux-amd64.tar.gz # 在放压缩包的路径下解压

cd /share/nas1/yuj/software/canu-2.3.Linux-amd64/canu-2.3/bin/
chmod a+x canu
chmod -R  755  /share/nas1/yuj/software/canu-2.3.Linux-amd64/canu-2.3/

使用

2.1 组装

## V2.3
/share/nas1/yuj/software/canu-2.3.Linux-amd64/canu-2.3/bin/canu  -p  sample -d 00_assemble_canu   genomeSize=160k  useGrid=false maxThreads=8 -pacbio-hifi  map_gene.fa(fasta、fastq格式都可以)# 依赖库版本不够,用不了

/share/nas6/zhouxy/biosoft/perl/current/bin/perl  /share/nas1/yuj/software/micromamba/envs/pb-assembly/bin/canu -p  sample -d 00_assemble_canu genomeSize=160k corMhapSensitivity=high  corMinCoverage=0 useGrid=false maxThreads=8 -pacbio-hifi  map_gene.fa # hifi模式不成环

/share/nas6/zhouxy/biosoft/perl/current/bin/perl  /share/nas1/yuj/software/micromamba/envs/pb-assembly/bin/canu -p  sample -d 00_assemble_canu genomeSize=160k   corMhapSensitivity=high  corMinCoverage=0 useGrid=false maxThreads=8 -pacbio  map_gene.fa # hifi数据筛选后,使用pacbio模式成环了

## 服务器版本
/share/nas6/zhangxq/biosoft/canu-master/Linux-amd64/bin/canu  -p  sample -d assresult   genomeSize=160k  useGrid=false maxThreads=8 -pacbio-raw  map_gene.fa

2.2 校正测序数据

## v2.3
/share/nas1/yuj/software/canu-2.3.Linux-amd64/canu-2.3/bin/canu -correct -p correct -d correct genomeSize=200k useGrid=false -pacbio-hifi map_gene.fa

/share/nas6/zhouxy/biosoft/perl/current/bin/perl  /share/nas1/yuj/software/micromamba/envs/pb-assembly/bin/canu -correct -p correct -d correct genomeSize=200k useGrid=false -pacbio map_gene.fa # 运行ok

## 服务器版本
/share/nas6/zhangxq/biosoft/canu-master/Linux-amd64/bin/canu -correct -p correct -d correct genomeSize=200k useGrid=false -pacbio-raw map_gene.fa

2.3 环化组装结果

2.3.1 运行OK

## v2.3
grep \> 00_assemble_canu/*.contigs.fasta|perl -p -e 's/\>(.+?) len=.* trim=(.+?)-(.+?)/$1\t$2\t$3/' |awk '{start = $2 ; print $1,start,$3}'   | tr ' ' '\t'> trim_bed.bed
id=`ls 00_assemble_canu/*.contigs.fasta |perl -p -e 's/.*\/(.+?)\.contigs\.fasta/$1/'`

seqkit subseq --bed trim_bed.bed 00_assemble_canu/$id.contigs.fasta  >  00_assemble_canu/$id.contigs.trimed.fasta

2.3.2 环化(报错)

mamba install -y python=3.12
mamba install -y circlator

circlator all --verbose 00_assemble_canu/*.contigs.fasta correct/correct.correctedReads.fasta circlator_outdir

2.4 校正组装结果

使用nextpolish对三代组装进行polish(v1.2.2版) - 简书

## 安装
# mamba install -c bioconda nextpolish

mamba activate pb-assembly
## 二代数据可选
realpath ERR2173372_1.fastq ERR2173372_2.fastq  > sgs.fofn # 一行一个
realpath unmapped.fastq   > nextpolish.lgs.fofn

cp /share/nas1/yuj/software/nextpolish/nextpolish.run.cfg ./
配置运行文件
nextPolish  ./nextpolish.run.cfg
cp nextpolish_rundir/genome.nextpolish.fasta ./
mamba deactivate